#R version 4.2.1 (2022-06-23)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 10 x64 (build 19044)

# may need to unload some packages to make everything work 
#detach("package:ltm", unload=TRUE)
#detach("package:MASS", unload=TRUE)

library(dplyr) #version 1.0.9
library(readr) #version 2.1.2
library(stringr) #version 1.4.0

#install country standardization tool
library(devtools) #version 2.4.3
devtools::install_github("dsself/standardizecountries", force = T)
library(standard)

#inport v-dem data####
d1 <- read_rds("data/vdem_party.rds") %>% 
  mutate(countryname = country_panel(country_name, year)) %>% 
  select(countryname, year, v2pashname, v2paseatshare, v2panumbseat, v2patotalseat, v2pavote, 
         v2paclient, v2palocoff, v2paactcom, v2pasoctie, v2panom, v2padisa, v2paind, v2pagovsup) %>% 
  filter(v2pagovsup %in% c(0,1))

#gwf####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, gwf_casename, gwf_regimetype, v2pssunpar, v2xps_party) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(gwf_regimetype, "military"))

gwf <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

gwf_dffa <- select(gwf, gwf_casename, year, v2palocoff, v2panom, v2pasoctie) %>% 
  na.omit() %>% unique()

gwf_fa <- select(gwf_dffa, v2palocoff, v2panom, v2pasoctie) 

fit_gwf <- factanal(gwf_fa, 1, rotation="varimax", scores = "regression")

out_gwf <- as_tibble(fit_gwf$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(gwf_incumbent_pi = Factor1)

gwf_fa <- cbind(gwf_dffa, out_gwf) %>% 
  select(gwf_casename, year, gwf_incumbent_pi) 

gwf_final <- left_join(gwf, gwf_fa, by = c("gwf_casename", "year")) %>% 
  group_by(gwf_casename, year) %>% 
  mutate_at(.vars = vars(gwf_incumbent_pi), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(gwf_casename), str_detect(gwf_regimetype, "military")) %>%
  select(gwf_casename, year, gwf_incumbent_pi) %>% 
  unique() %>% arrange(desc(gwf_incumbent_pi)) %>% 
  write_csv("data/gwf_fa_pi.csv")


#par####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, svolik_case, svolik_regime, v2pssunpar, v2xps_party) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(svolik_regime, "military"))

svolik <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

svolik_dffa <- select(svolik, svolik_case, year, v2palocoff, v2panom, v2pasoctie) %>% 
  na.omit() %>% unique()

svolik_fa <- select(svolik_dffa, v2palocoff, v2panom, v2pasoctie) 

fit_svolik <- factanal(svolik_fa, 1, rotation="varimax", scores = "regression")

out_svolik <- as_tibble(fit_svolik$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(svolik_incumbent_pi = Factor1)

svolik_fa <- cbind(svolik_dffa, out_svolik) %>% 
  select(svolik_case, year, svolik_incumbent_pi) 

svolik_final <- left_join(svolik, svolik_fa, by = c("svolik_case", "year")) %>% 
  group_by(svolik_case, year) %>% 
  mutate_at(.vars = vars(svolik_incumbent_pi), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(svolik_case), str_detect(svolik_regime, "military")) %>%
  select(svolik_case, year, svolik_incumbent_pi) %>% 
  unique() %>% arrange(desc(svolik_incumbent_pi)) %>% 
  write_csv("data/svolik_fa_pi.csv")

#dd####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, dd_case, dd_regime, v2pssunpar, v2xps_party) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(dd_regime, "military"))

dd <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

dd_dffa <- select(dd, dd_case, year, v2palocoff, v2panom, v2pasoctie) %>% 
  na.omit() %>% unique()

dd_fa <- select(dd_dffa, v2palocoff, v2panom, v2pasoctie) 

fit_dd <- factanal(dd_fa, 1, rotation="varimax", scores = "regression")

out_dd <- as_tibble(fit_dd$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(dd_incumbent_pi = Factor1)

dd_fa <- cbind(dd_dffa, out_dd) %>% 
  select(dd_case, year, dd_incumbent_pi) 

dd_final <- left_join(dd, dd_fa, by = c("dd_case", "year")) %>% 
  group_by(dd_case, year) %>% 
  mutate_at(.vars = vars(dd_incumbent_pi), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(dd_case), str_detect(dd_regime, "military")) %>%
  select(dd_case, year, dd_incumbent_pi) %>% 
  unique() %>% arrange(desc(dd_incumbent_pi)) %>% 
  write_csv("data/dd_fa_pi.csv")

#wth####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, wth_case, wth_regime, v2pssunpar, v2xps_party) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(wth_regime, "military"))

wth <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

wth_dffa <- select(wth, wth_case, year, v2palocoff, v2panom, v2pasoctie) %>% 
  na.omit() %>% unique()

wth_fa <- select(wth_dffa, v2palocoff, v2panom, v2pasoctie) 

fit_wth <- factanal(wth_fa, 1, rotation="varimax", scores = "regression")

out_wth <- as_tibble(fit_wth$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(wth_incumbent_pi = Factor1)

wth_fa <- cbind(wth_dffa, out_wth) %>% 
  select(wth_case, year, wth_incumbent_pi) 

wth_final <- left_join(wth, wth_fa, by = c("wth_case", "year")) %>% 
  group_by(wth_case, year) %>% 
  mutate_at(.vars = vars(wth_incumbent_pi), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(wth_case), str_detect(wth_regime, "military")) %>%
  select(wth_case, year, wth_incumbent_pi) %>% 
  unique() %>% arrange(desc(wth_incumbent_pi)) %>% 
  write_csv("data/wth_fa_pi.csv")

#validation####

gwf_m <- select(gwf_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

gwf_sd <- select(gwf_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

svolik_m <- select(svolik_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

svolik_sd <- select(svolik_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

dd_m <- select(dd_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

dd_sd <- select(dd_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

wth_m <- select(wth_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

wth_sd <- select(wth_dffa, v2palocoff, v2panom, v2pasoctie) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

all_means <- rbind(gwf_m, svolik_m, dd_m, wth_m) %>% 
  mutate(dataset = c("GWF", "PAR", "DD", "WTH")) %>% 
  select(dataset, v2palocoff, v2panom, v2pasoctie)

all_sd <- rbind(gwf_sd, svolik_sd, dd_sd, wth_sd) %>% 
  mutate(dataset = c("GWF", "PAR", "DD", "WTH")) %>% 
  select(dataset, v2palocoff, v2panom, v2pasoctie)



#plots ####
library(scales)
gwf <- read_csv("data/panel.csv") %>% 
  select(countryname, gwf_casename, year, gwf_regimetype, v2xps_party) %>% 
  filter(str_detect(gwf_regimetype, "military")) %>% 
  left_join(gwf_final, by = c("gwf_casename", "year")) %>% 
  group_by(gwf_casename) %>% 
  mutate_at(.vars = vars(gwf_incumbent_pi), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(gwf_incumbent_pi1 = ifelse(is.na(gwf_incumbent_pi), v2xps_party, gwf_incumbent_pi)) %>% 
  mutate(v2xps_party = ifelse(is.na(v2xps_party), 0, v2xps_party)) %>% 
  mutate(gwf_incumbent_pi2 = ifelse(is.na(gwf_incumbent_pi), v2xps_party, gwf_incumbent_pi)) 

gwf1 <- ggplot(gwf, aes(x = gwf_incumbent_pi1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: GWF \n Percentage") + 
  xlab("") +
  scale_y_continuous(labels = percent)

gwf2 <- ggplot(gwf, aes(x = gwf_incumbent_pi2)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: GWF \n Percentage") + 
  xlab("") +
  scale_y_continuous(labels = percent)

par <- read_csv("data/panel.csv") %>% 
  select(countryname, svolik_case, year, svolik_regime, v2xps_party) %>% 
  filter(str_detect(svolik_regime, "military")) %>% 
  left_join(svolik_final, by = c("svolik_case", "year")) %>% 
  group_by(svolik_case) %>% 
  mutate_at(.vars = vars(svolik_incumbent_pi), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(svolik_incumbent_pi1 = ifelse(is.na(svolik_incumbent_pi), v2xps_party, svolik_incumbent_pi)) %>% 
  mutate(v2xps_party = ifelse(is.na(v2xps_party), 0, v2xps_party)) %>% 
  mutate(svolik_incumbent_pi2 = ifelse(is.na(svolik_incumbent_pi), v2xps_party, svolik_incumbent_pi)) 

par1 <- ggplot(par, aes(x = svolik_incumbent_pi1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: PAR") + 
  xlab("") +
  scale_y_continuous(labels = percent)

par2 <- ggplot(par, aes(x = svolik_incumbent_pi2)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: PAR") + 
  xlab("") +
  scale_y_continuous(labels = percent)

dd <- read_csv("data/panel.csv") %>% 
  select(countryname, dd_case, year, dd_regime, v2xps_party) %>% 
  filter(str_detect(dd_regime, "military")) %>% 
  left_join(dd_final, by = c("dd_case", "year")) %>% 
  group_by(dd_case) %>% 
  mutate_at(.vars = vars(dd_incumbent_pi), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(dd_incumbent_pi1 = ifelse(is.na(dd_incumbent_pi), v2xps_party, dd_incumbent_pi)) %>% 
  mutate(v2xps_party = ifelse(is.na(v2xps_party), 0, v2xps_party)) %>% 
  mutate(dd_incumbent_pi2 = ifelse(is.na(dd_incumbent_pi), v2xps_party, dd_incumbent_pi)) 

dd1 <- ggplot(dd, aes(x = dd_incumbent_pi1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: DD \n Percentage") + 
  xlab("Party Institutionalization") +
  scale_y_continuous(labels = percent)

dd2 <- ggplot(dd, aes(x = dd_incumbent_pi2)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: DD \n Percentage") + 
  xlab("Party Institutionalization") +
  scale_y_continuous(labels = percent)

wth <- read_csv("data/panel.csv") %>% 
  select(countryname, wth_case, year, wth_regime, v2xps_party) %>% 
  filter(str_detect(wth_regime, "military")) %>% 
  left_join(wth_final, by = c("wth_case", "year")) %>% 
  group_by(wth_case) %>% 
  mutate_at(.vars = vars(wth_incumbent_pi), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(wth_incumbent_pi1 = ifelse(is.na(wth_incumbent_pi), v2xps_party, wth_incumbent_pi)) %>% 
  mutate(v2xps_party = ifelse(is.na(v2xps_party), 0, v2xps_party)) %>% 
  mutate(wth_incumbent_pi2 = ifelse(is.na(wth_incumbent_pi), v2xps_party, wth_incumbent_pi)) 

wth1 <- ggplot(wth, aes(x = wth_incumbent_pi1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: WTH") + 
  xlab("Party Institutionalization") +
  scale_y_continuous(labels = percent)

wth2 <- ggplot(wth, aes(x = wth_incumbent_pi2)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: WTH") + 
  xlab("Party Institutionalization") +
  scale_y_continuous(labels = percent)

ggarrange(gwf1, par1, dd1, wth1, ncol = 2, nrow = 2) +
  ggsave(filename = "figures/appendix_pi1.jpg", dpi = 1000, width = 6.5, height = 6.5)  

ggarrange(gwf2, par2, dd2, wth2, ncol = 2, nrow = 2) +
  ggsave(filename = "figures/appendix_pi2.jpg", dpi = 1000, width = 6.5, height = 6.5)